import pandas as pd
%matplotlib inline
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly
import plotly.graph_objs as go
import math
import scipy.optimize as optimize
data = pd.read_csv('weights_heights.csv', index_col='Index')
data.head()
ax = sns.distplot(data['Height'])
ax = sns.distplot(data['Weight'], color="r")
data['BMI'] = data['Weight']/data['Height'].pow(2)
data.head(3)
g = sns.pairplot(data, kind="reg",diag_kind='kde', size=4)
data['weight_category']=[1 if weight < 120 else 2 if weight < 150 else 3 for weight in data.Weight]
ax = sns.boxplot(x="weight_category", y="Height", data=data)
g = sns.jointplot("Weight", "Height", data=data, kind="kde", space=0, color="g")
def error(x, y, w0, w1):
sum1 = 0
for i in range(len(x)):
sum1 += pow((y[i] - w0 - w1*x[i]), 2)
return sum1
trace0 = go.Scatter(
x = data.Weight,
y = data.Height,
mode = 'markers',
name = 'Наше множество')
trace1 = go.Scatter(
x = data.Weight,
y = 55 + 0.04*data.Weight,
name = 'прямая 1')
trace2 = go.Scatter(
x = data.Weight,
y = 50 + 0.22*data.Weight,
name = 'прямая 2')
data1=[trace0, trace1, trace2]
layout1 = {'title': 'Две прямые'}
fig1 = go.Figure(data=data1, layout=layout1)
iplot(fig1, show_link=False)
w1 = np.linspace(-1, 1, 1000)
trace3 = go.Scatter(
x = w1,
y = error(data['Weight'].values, data['Height'].values, 50, w1),
name = 'график')
data2=[trace3]
layout2 = {'title': 'Зависимость ошибки от параметра w1'}
fig2 = go.Figure(data=data2, layout=layout2)
iplot(fig2, show_link=False)
eta = 1
minerr = math.pow(10, 10)
x = -1
while eta>math.pow(10, -10):
if error(data.Weight.values, data.Height.values, 50, x+eta)<minerr:
x += eta
minerr = error(data.Weight.values, data.Height.values, 50, x)
else:
eta = eta/10
print(x, minerr)
trace4 = go.Scatter(
x = data.Weight,
y = 50 + x*data.Weight,
name = 'мимимум')
data1=[trace0, trace1, trace2, trace4]
layout1 = {'title': 'Три прямые'}
fig1 = go.Figure(data=data1, layout=layout1)
iplot(fig1, show_link=False)
w0 = np.linspace(-100, 100, 50)
w1 = np.linspace(-5, 5, 50)
z = np.ndarray((50, 50))
for i in range(len(w0)):
for j in range(len(w1)):
z[i, j] = error(data.Weight.values, data.Height.values, w0[i], w1[j])
data6 = [go.Surface(x=w0, y=w1, z=z)]
layout = go.Layout(
title='Искомая плоскость',
autosize=False,
width=600,
height=600,
)
fig = go.Figure(data=data6, layout=layout)
iplot(fig, show_link=False)
err = lambda x: sum([math.pow(data.Height.values[i]-x[0]-x[1]*data.Weight.values[i], 2) for i in range(len(data.Weight.values))])
result = optimize.minimize(err, x0=[0, 0], method = 'L-BFGS-B', bounds=((-100,100),(-5,5)))
print(result)